package cn.xiaoniu.dmp.report

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}

object File2parquet {
  def main(args: Array[String]): Unit = {
   val conf =  new SparkConf()
      .setAppName("")
      .setMaster("local[*]")
      .set("","")//序列化
      .set("","")//压缩

    val sc = new SparkContext(conf)
    val ssc = new SQLContext(sc)

    val file: RDD[String] = sc.textFile("")
    //过滤
   val rdd: RDD[Array[String]] = file.map(_.split(",")).filter(_.size>=85)



//    ssc.createDataFrame()
  }
}
